5 Bacterial genome statistics

genome_metadata <- read_tsv(file=str_c("data/",release,"_genome.tsv"))
assembly_metadata <- read_tsv(file=str_c("data/",release,"_assembly.tsv"))
host_metadata <- read_tsv(file=str_c("data/",release,"_host.tsv"))
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    select(phylum, colors) %>% 
    unique() %>%
    arrange(phylum) %>%
    pull(colors, name=phylum)

5.1 Genome quantity

genome_metadata %>%
  nrow()
[1] 13833

5.2 Genome quality

genome_metadata %>%
  summarise(completenes_mean=mean(completeness),
            completenes_sd=sd(completeness),
            contamination_mean=mean(contamination),
            contamination_sd=sd(contamination)) %>% 
  tt()
completenes_mean completenes_sd contamination_mean contamination_sd
83.50943 15.27147 1.845634 2.074399
#Generate quality biplot
genome_biplot <- genome_metadata %>%
  select(c(genome_id,domain,phylum,completeness,contamination,genome_size)) %>%
  ggplot(aes(x=completeness,y=contamination,color=phylum)) +
              geom_point(alpha=0.7, size=1) +
                    ylim(c(10,0)) +
                    scale_color_manual(values=phylum_colors) +
                    labs(y= "Contamination", x = "Completeness") +
                    theme_classic() +
                    theme(legend.position = "none")

#Generate contamination boxplot
genome_contamination <- genome_metadata %>%
            ggplot(aes(y=contamination)) +
                    ylim(c(10,0)) +
                    geom_boxplot(colour = "#999999", fill="#cccccc") +
                    theme_void() +
                    theme(legend.position = "none",
                        axis.title.x = element_blank(),
                        axis.title.y = element_blank(),
                        axis.text.y=element_blank(),
                        axis.ticks.y=element_blank(),
                        axis.text.x=element_blank(),
                        axis.ticks.x=element_blank(),
                        plot.margin = unit(c(0, 0, 0.40, 0),"inches")) #add bottom-margin (top, right, bottom, left)

#Generate completeness boxplot
genome_completeness <- genome_metadata %>%
        ggplot(aes(x=completeness)) +
                xlim(c(50,100)) +
                geom_boxplot(colour = "#999999", fill="#cccccc") +
                theme_void() +
                theme(legend.position = "none",
                    axis.title.x = element_blank(),
                    axis.title.y = element_blank(),
                    axis.text.y=element_blank(),
                    axis.ticks.y=element_blank(),
                    axis.text.x=element_blank(),
                    axis.ticks.x=element_blank(),
                    plot.margin = unit(c(0, 0, 0, 0.50),"inches")) #add left-margin (top, right, bottom, left)

#Render composite figure
grid.arrange(grobs = list(genome_completeness,genome_biplot,genome_contamination),
        layout_matrix = rbind(c(1,1,1,1,1,1,1,1,1,1,1,4),
                              c(2,2,2,2,2,2,2,2,2,2,2,3),
                              c(2,2,2,2,2,2,2,2,2,2,2,3),
                              c(2,2,2,2,2,2,2,2,2,2,2,3),
                              c(2,2,2,2,2,2,2,2,2,2,2,3),
                              c(2,2,2,2,2,2,2,2,2,2,2,3),
                              c(2,2,2,2,2,2,2,2,2,2,2,3),
                              c(2,2,2,2,2,2,2,2,2,2,2,3),
                              c(2,2,2,2,2,2,2,2,2,2,2,3),
                              c(2,2,2,2,2,2,2,2,2,2,2,3),
                              c(2,2,2,2,2,2,2,2,2,2,2,3),
                              c(2,2,2,2,2,2,2,2,2,2,2,3)))

5.3 Taxonomic statistics

#phylum
genome_metadata %>% 
  group_by(phylum) %>%
  summarise(genomes=n(),
            completenes=mean(completeness),
            contamination=mean(contamination),
            size=mean(genome_size)/1000000) %>%
  arrange(-genomes) %>%
  filter(phylum != "p__") %>% 
  tt()
phylum genomes completenes contamination size
p__Bacillota_A 6029 83.66600 1.8519325 2.6764713
p__Bacteroidota 4301 82.61607 1.9322897 3.1244179
p__Bacillota 937 85.12057 2.0372423 1.9982295
p__Pseudomonadota 848 83.65697 1.7335684 2.2655061
p__Desulfobacterota 342 82.34488 1.4075263 2.7559354
p__Verrucomicrobiota 278 88.90504 0.8747950 2.4597815
p__Bacillota_C 217 87.40447 1.3593180 2.1676157
p__Fusobacteriota 215 79.36158 2.9872605 1.8014199
p__Campylobacterota 185 91.94984 0.5614054 1.6161529
p__Cyanobacteriota 131 84.78725 1.8406870 1.7740854
p__Actinomycetota 89 75.60730 2.4719775 1.9512960
p__Bacillota_B 66 79.31000 1.2919394 2.3504127
p__Spirochaetota 34 88.47882 0.7054706 1.9236368
p__Deferribacterota 32 84.84844 1.2626875 1.9807231
p__Halobacteriota 26 79.40154 0.9999231 1.5482744
nan 21 57.32810 6.5904762 2.2008697
p__Elusimicrobiota 19 83.18316 0.5752105 1.3912318
p__Patescibacteria 18 82.69833 2.1566667 0.7058587
p__Synergistota 16 84.26063 1.4668750 1.9546572
p__Acidobacteriota 4 81.94750 2.3075000 3.9519935
p__Chlamydiota 4 94.57250 0.4872500 1.1729767
p__Chloroflexota 4 67.88500 1.7650000 2.7801572
p__Myxococcota 3 75.54333 3.4300000 6.9908893
p__Thermoplasmatota 3 81.71667 0.6920000 1.2594070
p__Methanobacteriota 2 93.89000 0.1550000 1.9168890
p__Bdellovibrionota 1 96.59000 1.3200000 2.4054180
p__Desulfobacterota_F 1 99.99000 1.2000000 3.6243310
p__Desulfobacterota_G 1 81.00000 5.4100000 2.3275200
p__Fibrobacterota 1 76.19000 2.0900000 5.6243300
p__J088 1 58.55000 7.3500000 2.1950540
p__JAKLEM01 1 50.37000 2.7500000 1.5253600
p__Nanoarchaeota 1 90.32000 0.2600000 1.1657700
p__Planctomycetota 1 93.11000 1.7300000 4.3173220
#genus
genome_metadata %>% 
  group_by(genus) %>%
  summarise(genomes=n(),
            completenes=mean(completeness),
            contamination=mean(contamination),
            size=mean(genome_size)/1000000) %>%
  arrange(-genomes) %>%
  filter(genus != "g__") %>% 
  tt()
genus genomes completenes contamination size
g__Bacteroides 782 78.33046 2.7439923 4.0009306
g__Parabacteroides 530 83.27357 1.9446698 4.0925221
g__Odoribacter 393 83.65000 1.8165903 3.2201619
g__Phocaeicola 376 81.57056 2.5304628 3.7949477
g__Alistipes 302 83.68404 2.5063113 2.3133999
g__Mucinivorans 298 85.13017 1.3451007 1.9634117
g__Acetatifactor 236 84.55886 2.0868475 3.1500363
g__Prevotella 169 82.46012 1.6331243 2.8287474
g__Eisenbergiella 166 85.81470 2.1805602 3.1225606
g__JAAYNV01 166 90.83753 1.1252048 3.5637427
g__Akkermansia 156 90.83218 0.6747756 2.5919467
g__CAG-95 148 88.17736 1.8279122 3.6271573
g__Roseburia 139 92.77223 2.0899424 3.6256912
g__Alloprevotella 110 84.58818 1.4283000 2.3205209
g__Sutterella 110 79.61727 2.4599182 1.9038248
g__Parabacteroides_B 109 85.93239 1.4079358 3.6262743
g__Helicobacter_J 108 94.49046 0.5139630 1.6525751
g__JADFUS01 101 83.58069 1.7368317 1.9462928
g__Fusobacterium_B 99 78.06949 3.4891919 1.4979327
g__Bilophila 97 82.98887 1.2721340 2.9527382
g__Faecalibacterium 95 89.42579 1.4865579 2.3461926
g__Ventrimonas 92 85.84087 1.8352500 3.2875434
g__CAG-485 89 82.15955 1.6241910 2.6924061
g__Dielma 88 84.93807 2.8190909 2.9218847
g__UBA866 86 85.53233 0.6344070 2.2192333
g__Anaerobiospirillum 85 77.45506 1.7055294 2.8527684
g__Faecousia 85 83.13506 2.6253765 2.3809077
g__Negativibacillus 81 81.46185 1.4045556 2.6802667
g__Hungatella_A 80 80.82887 1.9934250 3.4562112
g__Kineothrix 80 89.97200 1.2764250 3.4972581
g__Fusobacterium_A 75 80.06187 2.6042800 2.1932724
g__Rikenella 75 81.77347 1.6146133 2.3120919
g__Anaerotruncus 73 78.51507 2.0466712 2.8334480
g__Clostridium_Q 71 81.96479 2.0313521 3.3676804
g__Coproplasma 69 87.66942 1.3636377 1.7947947
g__Desulfovibrio 69 82.01797 0.9959130 2.6519717
g__MGBC136627 68 88.06206 1.7638235 3.5698897
g__Mycoplasmoides 67 93.36910 1.1217910 0.9221261
g__Phascolarctobacterium_A 66 92.03152 1.0318788 1.9133610
g__Velocimicrobium 64 81.50922 1.5529688 3.4432223
g__Breznakia 63 84.85667 2.5348730 2.3763616
g__Enterocloster 63 75.35508 2.0939683 3.5403101
g__CAG-873 59 83.39458 1.1828814 2.2938315
g__Duncaniella 58 86.78741 1.4717586 2.8216755
g__Blautia_A 57 78.78474 2.5613509 2.7172184
g__Mediterranea 57 72.76281 1.9669825 2.5648671
g__Blautia 56 77.95143 2.0490893 2.3701946
g__Gallimonas 56 81.03089 1.7190179 1.5258936
g__UBA3282 51 82.45765 2.5130588 2.8398454
g__UBA7173 51 84.33098 1.3861569 2.3581050
g__Paraprevotella 49 82.23653 1.6722449 2.6908366
g__Butyribacter 48 86.93104 1.4707917 2.6354504
g__Clostridium 46 77.90761 3.6103913 2.8257833
g__Mycoplasma_L 45 82.54400 1.4944444 0.8095026
g__Sarcina 44 80.23545 3.5454773 2.6169732
g__Tidjanibacter 43 84.50977 1.2661860 1.8775005
g__Fimenecus 42 89.46048 0.8717619 1.9771575
g__Lachnotalea 42 89.20786 1.5045476 3.4696346
g__Oscillibacter 42 80.45357 1.8935476 2.4644769
g__RGIG4733 42 87.44619 1.4109286 3.7190687
g__Egerieousia 41 85.61780 0.5395122 2.1987519
g__Thomasclavelia 41 85.09366 2.7437317 3.0183990
g__COE1 40 83.61400 1.5067000 2.7887079
g__Choladocola 40 83.92400 2.7617250 2.9251094
g__Dysosmobacter 40 76.40050 4.1273500 2.1293909
g__Lacrimispora 40 84.73675 1.6506250 3.7337944
g__Aeromonas 38 82.06474 2.9341053 3.4772702
g__UBA4293 38 90.73132 0.8545000 3.2863527
g__Ruthenibacterium 37 83.08595 1.4613784 2.3621107
g__CAG-590 36 84.28139 2.1854167 2.9131451
g__Intestinimonas 36 67.58972 3.7120278 2.4002201
g__JAGBWK01 36 88.61028 0.8640833 2.2896926
g__Angelakisella 35 82.57371 1.8402000 2.1102356
g__CAG-269 34 76.29647 3.1020294 1.4356728
g__MGBC140009 34 84.71118 2.3186176 3.7114479
g__Mailhella 34 78.87941 2.4362059 2.4408852
g__CAZU01 33 79.14788 1.8684545 1.3614661
g__Coprobacillus 33 83.75333 1.8093939 3.0630959
g__Gemmiger 33 82.31758 1.4976061 2.3064288
g__NHYM01 32 90.92844 0.4242187 1.4602849
g__Anaeroplasma 31 93.09806 0.9320968 1.8039064
g__Escherichia 31 84.29097 1.7174194 3.9955748
g__Ventricola 31 78.08355 2.3232581 2.3429855
g__C-19 29 82.50138 2.4608621 3.1923226
g__JALFVM01 29 80.08793 2.4764483 3.4476421
g__Eubacterium_R 28 82.10036 1.5371071 1.8415248
g__Phascolarctobacterium 28 90.41821 1.5412857 1.9465343
g__RUG626 27 83.41259 2.4808519 2.1199066
g__Schaedlerella 27 76.37185 2.5315926 2.1058506
g__Spyradosoma 27 92.63148 0.8955185 2.0503313
g__UMGS1251 26 83.22077 1.9021538 3.5291067
g__14-2 25 90.15320 2.0857200 3.4587848
g__Agathobacter 25 88.42320 1.6148800 2.4598529
g__CHH4-2 25 78.60160 2.2800400 3.3287936
g__Choladousia 25 77.94320 2.8195600 2.9779826
g__Copromonas 25 77.40440 2.2032000 2.9744614
g__Hungatella 25 70.40920 3.0590000 3.7537133
g__Anaerotignum 24 76.03750 0.9433750 1.8758788
g__Coprococcus 24 87.86208 1.8955000 2.3096639
g__Eubacterium_F 24 88.00542 1.3922917 2.3872639
g__Hespellia 24 90.11583 1.4644583 3.0224436
g__Cryptobacteroides 23 90.74261 0.9896522 2.2886778
g__Fournierella 23 73.29261 1.7945652 2.0709552
g__Hydrogenoanaerobacterium 23 82.05000 1.4367391 2.5756478
g__Limiplasma 23 85.40217 1.9344783 2.9201134
g__Peptacetobacter 23 66.78609 6.3539130 1.3394770
g__Aphodousia 22 79.60318 2.9750000 1.4761859
g__CAJOIG01 22 80.08455 2.5090909 1.9237138
g__UBA3402 22 79.38045 1.8536364 2.5131430
g__UBA4292 22 86.04545 1.7050909 2.5733936
g__Buttiauxella 21 92.28000 2.7843810 4.3693456
g__CALUXS01 21 93.35714 0.9500000 1.4981888
g__Helicobacter_C 21 85.56476 0.7808095 1.6630911
g__MGBC165282 21 87.17952 1.6013333 3.7568950
g__OM05-12 21 80.16048 1.3484286 2.6119786
g__Onthomonas 21 73.55238 3.7084762 1.8695060
g__SZUA-378 21 86.10286 1.7407619 3.5192275
nan 21 57.32810 6.5904762 2.2008697
g__Cetobacterium 20 82.42600 2.2130000 1.7502279
g__Collinsella 20 68.50700 3.7920000 1.2146614
g__Avoscillospira 19 76.18842 2.6952632 2.0515814
g__Brevinema 19 87.14000 0.4433158 1.5556709
g__Faecalimonas 19 76.65263 2.3373684 1.9298566
g__RGIG6463 19 81.38474 0.7295789 2.3652443
g__Ligilactobacillus 18 93.03889 1.4155556 2.0286136
g__UBA3263 18 81.68278 0.7034444 1.7265587
g__Caccovivens 17 83.94176 1.1797647 1.0229758
g__Enterenecus 17 78.93353 2.6711765 2.9658340
g__Lawsonibacter 17 74.15353 2.4225294 2.0747672
g__Malacoplasma 17 88.28882 0.1523529 0.8257163
g__Pelethenecus 17 92.13941 1.6034118 1.6939882
g__CAJLXD01 16 87.38875 1.7173750 1.6397237
g__Lachnospira 16 81.09125 2.1463125 2.2896428
g__Limivicinus 16 77.16688 1.9211875 2.0074292
g__Methanimicrococcus 16 81.64687 0.4523750 1.5468957
g__Paramuribaculum 16 84.08062 1.7264375 2.2698737
g__RGIG8745 16 83.20250 1.8636875 3.2401001
g__Scatousia 16 81.75938 1.7710000 1.7558833
g__Wujia 16 92.33750 0.6005000 2.4458048
g__Allobaculum 15 80.32733 0.6946667 1.7607492
g__CAJGBR01 15 94.39133 0.5913333 2.0574078
g__Fimivivens 15 74.84467 1.9449333 1.9233645
g__Gabonibacter 15 74.62867 1.7666667 1.5227652
g__HGM05232 15 80.43800 0.7560000 2.4911500
g__Hafnia 15 97.14800 0.8912000 4.5309756
g__JAAYQI01 15 70.40267 1.7640000 2.1705564
g__JAIHAL01 15 86.08533 1.0728667 2.3547987
g__MGBC143606 15 82.94867 1.4291333 2.5496931
g__Megamonas 15 86.81267 0.7893333 1.8396857
g__RGIG3002 15 85.78133 1.7126000 3.0181283
g__RGIG7193 15 84.87267 2.0974000 3.0412837
g__Ruminococcus_D 15 83.59733 1.7346667 2.5077879
g__UBA7185 15 77.39200 2.4129333 3.0214479
g__Enterobacter 14 86.10786 2.5121429 4.1201701
g__RGIG4057 14 81.93214 2.8901429 2.7730806
g__Anaerorhabdus 13 87.90846 2.2769231 1.9936744
g__Intestinibacillus 13 82.73154 1.7016154 2.3811686
g__JAGNZR01 13 79.38538 2.4630769 1.9243046
g__Limenecus 13 85.33462 1.5905385 1.9508987
g__Mobilisporobacter 13 77.81462 1.0338462 3.3343508
g__Pseudoflavonifractor 13 73.34615 3.3313077 2.7327018
g__Ruminococcus_B 13 80.66154 2.4323077 2.1689964
g__Scatacola_A 13 94.56154 0.8500000 1.7181672
g__Scatocola 13 90.11923 1.0049231 1.5943258
g__V9D3004 13 86.80923 0.4715385 2.0260051
g__CAG-303 12 79.13333 0.9313333 2.1966957
g__CAG-41 12 80.17250 1.8965833 2.0270892
g__CAG-632 12 87.22250 1.1827500 2.6463514
g__Lawsonia 12 85.84250 0.1876667 1.6176053
g__RUG115 12 94.12417 1.5739167 2.8114713
g__Ruminococcus 12 83.18250 1.3184167 2.2477094
g__UBA1436 12 76.77083 0.5188333 1.2959140
g__UMGS1601 12 86.77750 1.7325000 1.5879301
g__WRDF01 12 77.01500 1.2186667 1.9408515
g__Avidehalobacter 11 82.76545 1.0967273 1.3185645
g__Butyricicoccus 11 78.50909 2.3300000 1.8126992
g__CAG-115 11 86.46455 1.7650000 2.2535266
g__CAG-353 11 79.36000 0.6951818 2.0303291
g__CAG-475 11 92.92727 0.5990000 1.6750680
g__Faecalibacillus 11 81.96091 3.1748182 2.0685129
g__HGM12587 11 78.72364 2.6268182 2.4847537
g__JAGAJR01 11 87.12273 1.4683636 1.5099542
g__JAJQCX01 11 93.42818 0.9560909 2.1504757
g__Limisoma 11 91.05000 0.8918182 2.2485101
g__MGBC120314 11 85.83000 2.1864545 2.5837639
g__MGBC124762 11 85.50091 1.8490909 1.6956766
g__NSJ-61 11 82.36909 3.3135455 3.2853601
g__Plesiomonas 11 96.30909 0.9975455 3.0882576
g__RUG762 11 83.39273 0.7753636 2.3945672
g__Ruminococcus_E 11 86.79636 1.4196364 1.8274238
g__Scatovivens 11 79.70909 4.3981818 1.5802639
g__Turicibacter 11 84.59909 2.8272727 2.4089091
g__UBA1366 11 93.70545 0.1300000 2.4731303
g__UBA3210 11 90.98455 1.1710909 1.6164445
g__Vibrio 11 85.75818 1.6739091 3.2846805
g__Victivallis 11 79.49636 2.2709091 1.9266078
g__Anaerovorax 10 74.51600 2.9513000 2.4406158
g__Butyricimonas 10 79.05000 1.9265000 3.5509095
g__CAG-882 10 90.20700 1.5950000 2.5726870
g__CALXXL01 10 79.31500 1.1430000 1.6799653
g__Gallispira 10 79.01400 0.8825000 1.7711217
g__JAMPGF01 10 79.85800 1.6032000 1.9668257
g__MGBC131033 10 82.75300 0.7994000 3.2036434
g__Marvinbryantia 10 79.79100 1.9050000 2.3818468
g__Monoglobus 10 78.41300 1.4600000 2.0817256
g__Mucispirillum 10 90.61400 0.9850000 2.1189145
g__Phocaeicola_A 10 76.59100 1.6730000 2.8213249
g__RGIG8482 10 80.09400 1.5901000 1.4847350
g__RUG11237 10 86.87400 2.9199000 3.2022827
g__Scatenecus 10 90.96700 0.8973000 1.7606572
g__UBA2942 10 83.23500 1.0328000 2.5558953
g__UBA7488 10 74.56400 2.1603000 1.2515580
g__WRHT01 10 77.74800 0.9126000 2.5760728
g__Amedibacillus 9 77.57333 2.6465556 2.3740843
g__CALWRD01 9 82.82222 1.0577778 1.9211042
g__Citrobacter 9 96.53444 1.4914444 4.9930733
g__Coprovivens 9 91.48000 3.2896667 1.6716806
g__Emergencia 9 83.66222 2.1676667 2.3280821
g__Gallibacteroides 9 84.24111 1.7688889 3.3193410
g__Gastranaerophilus 9 90.79000 1.2432222 1.7385116
g__MGBC101980 9 85.99111 0.6412222 1.8073716
g__Muribaculum 9 94.57222 1.0854444 2.7427323
g__RUG11200 9 90.18444 1.1166667 2.1175361
g__Sphingobacterium 9 85.83000 2.8977778 3.5378608
g__Stoquefichus 9 86.26889 1.2946667 2.8594062
g__VSOB01 9 85.79667 1.9776667 2.2551070
g__W0P33-017 9 75.43111 1.9594444 2.2320907
g__Acinetobacter 8 86.95500 2.3887500 2.8099321
g__Anaerostipes 8 95.33625 2.1637500 2.9547254
g__Avirikenella 8 85.40125 0.6075000 2.0423169
g__CAG-103 8 85.61000 2.0267500 2.0896017
g__CAG-345 8 83.67375 1.1837500 1.0782471
g__CAG-411 8 83.07375 3.1503750 3.3120712
g__CAG-826 8 78.53000 0.5166250 1.0761334
g__CALURL01 8 75.55875 2.0738750 1.8507124
g__Gallalistipes 8 80.83375 2.7625000 2.1984795
g__HGM11604 8 84.47875 2.7836250 2.1633904
g__Paracoccus 8 82.14750 3.6432500 3.4389716
g__Parasutterella 8 78.27500 1.8747500 2.0430726
g__Pyramidobacter 8 83.19500 1.4475000 1.8790534
g__RF16 8 92.65000 0.9027500 2.2293974
g__Ruminiclostridium_E 8 95.08125 1.7415000 2.3879136
g__UBA2882 8 96.81375 2.0061250 3.1634651
g__UBA3855 8 85.68625 0.9150000 1.9672994
g__UBA5026 8 71.98000 1.3760000 1.0391106
g__UMGS27 8 85.86375 0.5930000 2.7725934
g__Agathobaculum 7 80.66429 1.2732857 1.8468374
g__Aminipila 7 65.84429 0.9242857 1.9814247
g__Bifidobacterium 7 77.75143 3.2428571 1.7703733
g__Butyricicoccus_A 7 83.87429 1.5264286 1.8705946
g__CAG-1782 7 75.51286 0.8022857 1.6242547
g__CAG-45 7 92.67714 2.2241429 2.7197579
g__Campylobacter_D 7 97.15571 0.4785714 1.6426069
g__Enterousia 7 83.04000 1.0925714 0.8671740
g__Flavobacterium 7 92.39000 0.5457143 2.4081123
g__HGM11386 7 66.51286 0.9540000 1.9037451
g__Klebsiella 7 86.03286 2.2102857 4.7065624
g__Lactococcus 7 92.60429 0.2594286 2.0698919
g__MGBC105563 7 81.80429 2.0500000 1.3936926
g__MGBC116941 7 81.65000 0.8015714 1.6002653
g__Merdiplasma 7 84.17286 1.6585714 2.5816581
g__Methanocorpusculum 7 76.21000 1.1114286 1.4404811
g__Nanosyncoccus 7 78.73857 1.9728571 0.6424371
g__QXXE01 7 84.92286 2.0905714 2.3193721
g__Spyradomonas 7 89.26571 2.0331429 1.9821764
g__Streptococcus 7 89.80429 0.7385714 1.6322636
g__UBA2658 7 80.17714 0.9951429 1.8232519
g__UMGS1663 7 72.89286 3.8432857 1.1852456
g__WRKB01 7 70.70143 1.0100000 2.2559680
g__Acutalibacter 6 72.77833 3.3851667 2.0457885
g__Aquirickettsiella 6 92.20333 0.5300000 1.4524000
g__Avilachnospira 6 89.08833 2.0816667 1.9312978
g__Bacteroides_G 6 85.11167 1.1333333 2.8740010
g__Budvicia 6 97.23167 0.7200000 4.9492155
g__CAAEEV01 6 76.04833 1.2863333 2.1261743
g__CAG-267 6 85.30500 0.4736667 1.5094952
g__CAG-56 6 74.37000 3.2266667 2.4492095
g__CAG-603 6 77.86833 2.9916667 2.0778473
g__CAJMNU01 6 80.00000 3.3713333 2.6483398
g__CAKVBE01 6 93.98833 0.5483333 1.2549825
g__CAMBRQ01 6 68.54667 2.5985000 2.1149285
g__Caccovicinus 6 79.73000 4.4755000 3.3505713
g__Corynebacterium 6 95.23500 0.6166667 2.9240602
g__Craterilacuibacter 6 78.19833 1.6433333 2.4728848
g__Fimimonas 6 94.67500 0.7883333 1.6774983
g__Fimimorpha 6 77.44333 2.1108333 2.6436418
g__Fimivicinus 6 90.49500 1.6315000 2.6179075
g__Fimousia 6 88.05000 3.7366667 2.9866347
g__Galligastranaerophilus_A 6 90.35167 1.2448333 1.8708660
g__HGM11788 6 78.64500 1.7405000 1.9771743
g__Helicobacter_A 6 71.06333 1.1016667 1.6720998
g__Lactobacillus 6 77.14500 1.5933333 1.4695768
g__Limadaptatus 6 88.71167 0.7150000 1.5034233
g__MGBC133411 6 91.97167 0.5825000 1.6246583
g__Pelethomonas 6 77.19333 2.8505000 1.7738373
g__Phocea 6 78.76000 0.3800000 2.3616727
g__Pseudomonas_E 6 73.11667 1.9236667 4.1134410
g__RUG14156 6 76.58833 3.1843333 1.5746832
g__SIG332 6 70.90667 1.1346667 2.8128192
g__Staphylococcus 6 77.58000 3.4650000 2.3945450
g__Stercorousia 6 93.04000 0.3431667 1.7025937
g__UBA1213 6 78.93833 0.7800000 1.9680687
g__UBA7050 6 77.24167 1.2411667 2.5507447
g__UMGS1590 6 80.11500 1.3383333 1.5804592
g__Ventrenecus 6 72.16500 4.2231667 1.2198980
g__Avimicrobium 5 78.05800 3.3280000 1.3810848
g__C-53 5 89.79600 1.9618000 2.6802446
g__CAG-196 5 91.40600 2.6480000 1.8225938
g__CAG-288 5 81.60200 2.7342000 1.3730546
g__CAKSQF01 5 78.52200 1.3288000 2.0100842
g__CALXRO01 5 75.59800 1.3176000 0.9162908
g__Clostridium_AQ 5 89.23400 1.3518000 3.7075122
g__Dietzia 5 65.72800 3.4980000 2.4245102
g__Enterococcus_B 5 83.77000 2.9840000 2.2895470
g__Eubacterium_G 5 90.79600 1.3840000 2.6178020
g__Evtepia 5 78.01800 2.3560000 2.0962234
g__Faecimonas 5 82.05400 3.5714000 1.3842976
g__Faeciplasma 5 93.68800 0.6060000 2.2825556
g__Harryflintia 5 80.60600 0.9074000 2.8972694
g__Hepatoplasma 5 90.49800 0.2704000 0.6231124
g__Holdemanella 5 80.62600 4.2340000 1.8072704
g__JAFLUQ01 5 95.24400 1.2542000 1.9725872
g__JAHHTP01 5 75.45400 0.8560000 2.0786310
g__MGBC163490 5 77.85200 2.4320000 2.1987082
g__Massiliimalia 5 88.99200 0.5920000 2.1494490
g__Onthocola_B 5 84.46600 2.8500000 1.2800136
g__Protoclostridium 5 82.00800 1.5642000 1.8492688
g__Pseudoruminococcus 5 89.23200 2.0720000 1.8611478
g__RGIG3040 5 97.39400 0.8642000 2.5891560
g__RGIG6919 5 99.63000 0.9120000 2.6111662
g__Romboutsia_C 5 77.98200 3.2934000 2.1121234
g__SIG299 5 83.56400 1.8448000 2.8279066
g__UBA1224 5 85.63200 1.5040000 2.1756520
g__UBA1794 5 84.96800 0.9356000 2.5147674
g__UBA5578 5 90.46200 4.3400000 1.3501078
g__UBA6857 5 74.29600 3.1540000 1.4470482
g__UBA9732 5 91.61800 0.6700000 1.9234586
g__Amedibacterium 4 82.13750 2.3450000 2.0251692
g__Aphodocola 4 87.71250 1.7350000 1.1110805
g__Avigastranaerophilus 4 73.88250 3.2435000 1.6054120
g__Butyrivibrio_A 4 88.34250 0.7570000 1.9563928
g__CAG-510 4 80.43500 0.9660000 2.3178183
g__CAIPUE01 4 80.56500 0.4625000 1.5747565
g__CAJTFG01 4 82.45500 3.5237500 1.7213820
g__CAKRHR01 4 75.13750 0.7187500 1.5102438
g__CALVGN01 4 82.83000 3.9250000 1.2572817
g__Caccocola 4 88.53500 0.9950000 1.9669778
g__Coprobacter 4 78.73750 1.2475000 2.7038865
g__DTU064 4 86.46000 1.0125000 1.7178478
g__Eubacterium_I 4 71.91250 4.6975000 2.1710727
g__Extibacter 4 81.18250 2.3687500 3.1953317
g__Harrysmithimonas 4 87.94000 2.0750000 2.2354865
g__Helicobacter_B 4 88.63750 1.3825000 1.7077433
g__IOR16 4 87.13750 0.9137500 2.2664415
g__ISDg 4 92.15750 1.0257500 2.2986802
g__JAAWBF01 4 71.58750 2.9405000 2.2255128
g__JAHHSE01 4 71.69750 3.3570000 1.6763702
g__Lactonifactor 4 85.68000 2.7100000 3.6724078
g__Limosilactobacillus 4 84.22500 3.0500000 1.7117478
g__Luteimonas_D 4 75.56500 2.4700000 2.1514993
g__MGBC107952 4 70.80500 2.2402500 1.0953345
g__MGBC114844 4 75.18750 1.3825000 2.5196693
g__Merdicola 4 77.20500 3.4107500 1.6477073
g__Muricomes 4 78.61750 3.4662500 4.4242112
g__Oliverpabstia 4 75.78750 2.1542500 2.8218315
g__Pseudoscilispira 4 69.95000 3.8690000 1.9631340
g__RGIG1896 4 69.17000 0.4595000 1.8427130
g__RGIG9287 4 76.25500 0.5290000 1.2484727
g__SFTH01 4 64.97000 2.3650000 2.0539450
g__Salmonella 4 92.86500 2.2800000 4.5734605
g__Scandinavium 4 70.35000 5.0692500 3.4031733
g__UBA5884 4 80.21250 0.4500000 2.0662410
g__UBA6398 4 87.35000 1.3335000 2.3518997
g__UBA7477 4 95.67000 0.5750000 2.1938167
g__UBA940 4 66.11750 3.2475000 2.0416700
g__UMGS1202 4 72.48000 2.8457500 2.1953048
g__Ureaplasma 4 91.80750 2.9627500 0.8600967
g__Zag111 4 86.14250 2.4197500 1.7899070
g__1XD42-69 3 71.61000 1.1433333 2.2772987
g__Alistipes_A 3 63.87333 0.6833333 1.4833253
g__Anaerobiospirillum_A 3 84.31000 0.5766667 2.1252407
g__Anaerosphaera 3 98.75333 0.9000000 1.6050477
g__Bariatricus 3 81.43000 1.8856667 3.0292130
g__Brachybacterium 3 64.48667 4.7233333 2.6186257
g__CAG-266 3 99.85333 0.1966667 1.8609273
g__CAG-452 3 76.11667 2.5300000 1.1779373
g__CAG-465 3 74.60000 3.7193333 1.4152633
g__CAG-977 3 82.86333 2.9430000 2.0519867
g__CALXSC01 3 90.36667 1.1800000 1.4654037
g__Caccenecus 3 75.47667 4.2956667 1.0729603
g__Catenibacterium 3 88.83333 1.1033333 1.8893763
g__Cryptoclostridium 3 82.58667 0.7400000 1.6726777
g__Eggerthella 3 75.66667 1.9503333 2.6020303
g__F23-B02 3 91.86333 0.5033333 1.8480567
g__Faecalitalea 3 81.71333 1.6866667 1.4002217
g__Faecenecus 3 86.95000 2.7866667 1.2110683
g__Fimicola 3 93.99000 0.7566667 2.2741243
g__Helicobacter_G 3 100.00000 0.0000000 1.5149717
g__Howiella 3 68.67000 0.4400000 1.5884423
g__JAFLTL01 3 76.91000 2.5076667 2.0471750
g__JAFUYI01 3 87.06000 0.8333333 1.3605633
g__JAJQCF01 3 95.24000 0.1766667 1.9402947
g__JALEPO01 3 83.35333 2.0266667 2.3942543
g__Marseille-P3106 3 62.16667 4.4983333 1.6624623
g__Merdenecus 3 86.74000 2.7706667 2.2526657
g__Merdimorpha 3 80.58000 0.7300000 1.5979100
g__Microbacterium 3 83.60667 3.0493333 2.3846987
g__Moheibacter 3 97.86667 0.1933333 2.4978600
g__Morganella 3 97.21667 1.8966667 3.8163673
g__Onthovivens 3 88.70667 1.6066667 1.5028063
g__Paenalcaligenes 3 96.02000 1.4700000 2.3365020
g__Paraclostridium 3 82.09000 5.7133333 2.6121447
g__Peptostreptococcus 3 100.00000 0.4400000 2.1119113
g__RGIG4140 3 83.61333 1.8100000 1.3726980
g__RGIG6307 3 74.63667 3.1600000 2.5706307
g__SIG200 3 84.60667 1.2033333 1.2456207
g__SIG32 3 61.83000 5.4996667 1.4257573
g__Scatavimonas 3 94.25667 0.6403333 2.4870867
g__Spiro-02 3 97.97000 1.6043333 3.6046057
g__UBA1174 3 99.71000 0.2366667 1.6677177
g__UBA11774 3 95.11000 1.5036667 2.7622537
g__UBA11957 3 74.92667 3.6066667 2.7283987
g__UBA1221 3 75.62667 2.0630000 1.7075177
g__UBA1417 3 76.62667 2.2133333 1.9504873
g__UBA4372 3 78.80667 0.7123333 2.2924333
g__12844 2 79.00500 6.0600000 2.1416225
g__Acaricomes 2 83.10500 0.3850000 1.9028050
g__Adamsella 2 97.65500 2.4650000 2.0072200
g__Avelusimicrobium 2 82.78000 1.6265000 1.2236500
g__Beduini 2 84.23000 2.4380000 2.2955885
g__Brachyspira 2 61.97000 1.5700000 2.0049985
g__Bullifex 2 90.23500 0.8800000 2.2656315
g__CAG-1031 2 91.04500 0.8350000 2.4178530
g__CAG-273 2 89.40500 2.3500000 1.3872975
g__CAG-449 2 96.87000 0.3200000 1.7364425
g__CAG-492 2 75.24000 4.7050000 1.4435255
g__CAG-552 2 71.13500 5.4550000 1.2167080
g__CAIVKH01 2 72.66000 0.7850000 4.9133520
g__CAJKWP01 2 67.99500 1.2455000 0.9406700
g__CAKQDS01 2 95.05000 2.4215000 2.1544285
g__CAKTEE01 2 91.24000 0.8500000 2.4502275
g__CAKUFA01 2 60.76000 5.9550000 2.0217610
g__CALTSX01 2 93.87500 0.8955000 1.2786980
g__CALWPC01 2 78.11500 1.1110000 1.8187095
g__CALXFK01 2 82.88000 1.0650000 1.8610855
g__CCUG-7971 2 61.44500 7.9750000 2.0359945
g__Catellicoccus 2 81.70000 0.9850000 0.9953495
g__Cellulosilyticum 2 97.52000 2.9700000 3.8422710
g__Cetobacterium_A 2 92.11500 3.2800000 2.5676295
g__Clostridium_H 2 73.49000 7.8800000 1.2955745
g__Clostridium_J 2 71.68000 0.8250000 2.0860825
g__Clostridium_N 2 85.87000 1.4940000 1.9298470
g__Copranaerobaculum 2 86.36500 3.2515000 2.6776350
g__DXYV01 2 69.56500 3.8600000 2.3761105
g__Deefgea 2 64.91000 2.1900000 1.8032995
g__Dialister 2 90.27000 1.0100000 2.1800845
g__E4-60 2 84.33500 3.2300000 1.7521075
g__Enterococcus 2 89.61000 0.5100000 2.7818710
g__Enterococcus_A 2 51.88000 5.9160000 2.5312965
g__Enterococcus_D 2 91.22500 1.0255000 3.1736100
g__FLUQ01 2 87.40000 2.0750000 3.1562155
g__Faecisoma 2 93.01500 1.8965000 1.4232745
g__Faecivivens 2 76.76500 1.5000000 2.4873980
g__Fluviibacter 2 73.96000 0.2360000 1.4886875
g__Frisingicoccus 2 83.62000 1.5335000 1.9329985
g__Fructobacillus 2 70.85000 1.3335000 1.0711470
g__GCA-900066495 2 80.64500 4.2450000 2.5389705
g__HGM05190 2 95.21500 1.8650000 2.7495455
g__HGM13233 2 88.11000 1.4355000 1.8582025
g__HGM16780 2 74.29500 3.8250000 1.1528865
g__Helicobacter_D 2 72.05000 1.0400000 1.2014595
g__Ignatzschineria 2 84.60000 3.3950000 2.2587250
g__JAEDES01 2 73.47500 5.8150000 1.0426695
g__JAGPHI01 2 92.36000 0.0800000 2.4903040
g__JAHHTG01 2 94.32000 1.2100000 1.5924910
g__JAJQBJ01 2 77.29500 4.7500000 1.8839255
g__JAJQEJ01 2 80.75500 1.8500000 3.2014105
g__Jeotgalibaca 2 77.31500 3.4000000 2.3815695
g__KLE1615 2 90.58500 3.8300000 3.4542625
g__LD21 2 84.63500 0.6050000 3.6092415
g__Lysinibacillus 2 95.22000 0.0220000 4.2795200
g__MGBC162267 2 86.14000 0.9600000 1.1269010
g__Mammaliicoccus 2 81.60500 2.0950000 2.2090605
g__Novisyntrophococcus 2 68.38000 2.2400000 1.9206375
g__Onthenecus 2 52.91500 7.1500000 3.0607900
g__Onthousia 2 69.13500 0.7850000 0.9030700
g__Onthovicinus 2 78.60000 0.1550000 1.9316855
g__Pantoea 2 75.57000 2.8620000 5.0193540
g__Proteus 2 99.62500 0.2950000 3.7820085
g__RGIG1902 2 78.71000 2.0075000 1.6448660
g__RGIG3102 2 60.67000 0.2600000 1.4511170
g__RGIG3926 2 92.44500 0.6650000 1.7516480
g__RGIG4790 2 93.55500 2.0150000 0.9155400
g__RGIG5270 2 92.51000 0.2050000 3.4094050
g__RGIG7114 2 88.96000 1.9265000 1.9151880
g__RUG11788 2 94.06500 1.1100000 1.5345410
g__Raoultibacter 2 63.28000 4.4025000 1.7240865
g__Robinsoniella 2 99.51000 1.2525000 6.0692840
g__Romboutsia 2 65.42500 4.9550000 2.5898900
g__Ruminococcus_C 2 87.03000 1.6600000 2.2927460
g__Shewanella 2 95.18000 1.9850000 4.2699450
g__Smithella 2 94.25000 5.0250000 3.3927685
g__TWA4 2 78.67000 2.5550000 2.8554845
g__Tyzzerella 2 84.23000 0.6750000 1.4449830
g__UBA11358 2 86.94000 3.1400000 3.7858265
g__UBA11524 2 86.84000 2.4685000 2.9045465
g__UBA2475 2 85.26500 2.3150000 3.1131990
g__UBA3903 2 80.10500 0.0835000 1.8982870
g__UBA6265 2 74.64500 1.6650000 2.6058865
g__UMGS1585 2 73.24000 4.1195000 1.8699425
g__UMGS2016 2 73.07500 1.4840000 1.0016345
g__UMGS2069 2 82.80500 1.1100000 0.9754800
g__UMGS75 2 91.98000 0.8400000 1.5794495
g__UMGS995 2 88.62000 2.2650000 1.3178675
g__Vagococcus 2 66.47500 5.3350000 1.8343795
g__Veillonella_A 2 93.89500 0.3450000 2.2712830
g__WQYD01 2 63.44000 2.4500000 1.4795965
g__Aerococcus 1 59.02000 8.6300000 1.3373320
g__Aeromicrobium 1 99.89000 0.6400000 2.8213450
g__Agrococcus 1 98.43000 0.0300000 2.5044110
g__Alangreenwoodia 1 96.93000 0.1200000 3.1569430
g__Albibacterium 1 99.97000 0.3000000 3.7405060
g__Alcaligenes 1 56.61000 6.1300000 2.7206030
g__Aliidiomarina 1 52.20000 3.1400000 1.4865380
g__Anaerocaecibacter 1 78.76000 2.2360000 1.5971480
g__Arachnia 1 56.85000 0.2300000 1.7251140
g__Bacilliculturomica 1 97.00000 2.0950000 1.2819470
g__Bacteroides_E 1 99.43000 0.4500000 2.6663310
g__Bacteroides_H 1 95.02000 0.5660000 3.4673400
g__Barb7 1 98.58000 0.6200000 6.1182190
g__Barnesiella 1 99.35000 0.0900000 3.0426880
g__Borkfalkia 1 78.95000 4.4800000 1.7753920
g__Bradyrhizobium 1 62.81000 1.7500000 5.0405010
g__Brevibacterium 1 65.01000 1.8600000 3.2356890
g__Buchnera 1 95.05000 0.7100000 0.5147370
g__C7867-006 1 90.27000 8.1700000 0.7971490
g__CAG-1193 1 64.91000 0.6700000 1.2539700
g__CAG-170 1 85.03000 0.1900000 2.0396770
g__CAG-177 1 81.87000 1.8200000 1.7078730
g__CAG-217 1 52.02000 0.5200000 1.0088070
g__CAG-272 1 78.92000 0.6900000 1.5878050
g__CAG-313 1 69.08000 2.0900000 1.4483860
g__CAG-314 1 87.89000 1.0700000 1.3074640
g__CAG-433 1 92.35000 2.4400000 1.3596480
g__CAG-582 1 92.69000 0.0000000 1.2631680
g__CAG-793 1 89.49000 1.9100000 1.8937350
g__CAIQJJ01 1 53.49000 2.3600000 3.4070360
g__CAIQQL01 1 90.32000 0.2600000 1.1657700
g__CAJAUT01 1 87.93000 7.6400000 0.7850580
g__CAJMSU01 1 88.06000 1.0400000 1.0920350
g__CAKOLD01 1 62.29000 0.5800000 0.9057680
g__CAKVLS01 1 69.22000 0.5900000 0.7301980
g__CALUVN01 1 84.24000 0.6100000 1.7451340
g__CALVUN01 1 57.65000 9.6490000 2.3278350
g__CALVXC01 1 92.39000 1.9100000 1.0055050
g__CALWOS01 1 53.90000 1.0900000 1.6640440
g__CALXEL01 1 79.08000 2.0740000 2.2882710
g__CALXJL01 1 75.30000 1.7600000 1.1251280
g__CALXUC01 1 50.60000 6.6430000 0.9825170
g__CALXVS01 1 70.63000 1.5300000 1.0296420
g__Caccosoma 1 89.73000 1.6850000 1.2940300
g__Cardinium 1 84.75000 0.5000000 0.7669200
g__Chiayiivirga 1 92.95000 0.2100000 2.9693770
g__Citrobacter_A 1 97.08000 3.1420000 4.8801630
g__Cloacibacillus 1 99.94000 1.4500000 3.6037150
g__Clostridioides 1 99.30000 0.0000000 4.2129860
g__Clostridium_AH 1 60.75000 2.9200000 1.9427030
g__Clostridium_AI 1 98.11000 4.7700000 4.1531360
g__Coprococcus_A 1 82.82000 1.0800000 2.5061970
g__DUUI01 1 97.85000 0.0800000 1.5162980
g__DYRC01 1 81.39000 3.7300000 4.0181950
g__Duodenibacillus 1 94.85000 1.1100000 2.1803970
g__Dwaynesavagella 1 98.05000 2.0300000 2.1247500
g__Dysgonomonas 1 98.90000 0.5640000 2.5705980
g__Edwardsiella 1 90.83000 0.5200000 2.8654900
g__Enterococcus_J 1 50.72000 1.1700000 0.9908680
g__Erysipelothrix 1 99.95000 3.2900000 2.1357920
g__FEN-1139 1 97.42000 7.9000000 11.3623140
g__FEN-1279 1 99.55000 0.1900000 3.4961820
g__Fermentimonas 1 99.97000 0.1000000 3.9489400
g__Fimiplasma 1 96.94000 0.0500000 1.5553540
g__Firm-04 1 67.01000 2.3400000 1.2865310
g__Frigididesulfovibrio 1 94.73000 0.0700000 3.0212370
g__Gallibacter 1 96.32000 0.2600000 1.8428150
g__Geddesella 1 96.73000 0.0300000 2.1005630
g__Gordonia 1 71.73000 1.6800000 2.9947940
g__HGM10766 1 92.36000 0.7300000 1.3536230
g__HGM11525 1 67.71000 3.4000000 2.5100260
g__HGM12545 1 76.13000 3.4800000 2.2671200
g__HGM13010 1 69.41000 1.5200000 1.8758610
g__Halomonas 1 54.30000 8.2900000 2.0029490
g__Halopseudomonas 1 72.55000 2.3400000 2.4694990
g__Heteroruminococcus 1 51.25000 0.6700000 1.4534730
g__Holdemania 1 94.52000 0.0000000 3.0449160
g__Holophaga 1 71.42000 4.6500000 3.2369970
g__Hominisplanchenecus 1 52.63000 9.1700000 2.0338570
g__Hylemonella 1 56.38000 0.4800000 1.2174180
g__IGN3 1 94.67000 1.7400000 4.0465950
g__JAAWPK01 1 79.96000 9.5700000 1.1489760
g__JAAYKM01 1 93.11000 4.5600000 2.4479890
g__JAAYSD01 1 97.14000 0.8000000 2.2232730
g__JAAZDN01 1 81.34000 1.5600000 1.6726130
g__JABFSR01 1 79.06000 2.8400000 4.0676450
g__JABUSF01 1 67.08000 0.0500000 1.0973490
g__JACRCG01 1 99.99000 1.2000000 3.6243310
g__JACRJP01 1 64.10000 0.4300000 0.5241010
g__JADLHS01 1 69.19000 5.2800000 4.5299160
g__JAERTD01 1 78.85000 1.0800000 2.4014670
g__JAEWLZ01 1 62.06000 1.9580000 1.0931320
g__JAEZVV01 1 79.93000 1.0200000 2.0140720
g__JAFAFE01 1 84.78000 1.4900000 2.4148500
g__JAFGIC01 1 79.70000 5.2500000 3.5743770
g__JAFGVL01 1 50.93000 8.6800000 2.5758390
g__JAFTYR01 1 100.00000 0.6200000 3.0245750
g__JAGNPU01 1 91.60000 1.4900000 2.7235160
g__JAGOBP01 1 68.76000 1.3700000 2.5018200
g__JAHHTM01 1 50.03000 8.9230000 4.3682840
g__JAHHUI01 1 97.74000 0.6500000 1.5288480
g__JAHZHG01 1 69.92000 5.7600000 2.6796420
g__JAILHT01 1 80.35000 0.6900000 3.0262580
g__JAJPXI01 1 97.85000 1.9500000 1.9234720
g__JAJQAW01 1 65.82000 2.8700000 1.8167530
g__JAKALQ01 1 50.86000 2.9100000 1.5432920
g__JALENY01 1 81.94000 0.0200000 2.5501840
g__JALRDG01 1 86.80000 0.3000000 0.7550770
g__JAMLIO01 1 96.59000 1.3200000 2.4054180
g__JJ008 1 100.00000 0.3300000 4.2966590
g__Jeotgalicoccus 1 98.56000 4.5100000 2.0592920
g__Kluyvera 1 99.63000 0.5800000 4.6952270
g__Kosakonia 1 97.88000 4.7600000 5.4786960
g__Lachnoclostridium 1 92.39000 1.3420000 4.1176900
g__Lachnoclostridium_B 1 90.38000 1.4610000 2.8312080
g__Lactococcus_A 1 99.74000 1.0900000 2.5777500
g__Lapidilactobacillus 1 100.00000 0.9200000 1.8477010
g__Lentilactobacillus 1 65.07000 3.1900000 1.0477630
g__Leuconostoc 1 74.42000 6.7100000 1.4773370
g__Limnohabitans 1 94.85000 4.9300000 2.3250530
g__Limousia 1 89.53000 1.3420000 1.6878580
g__Luteolibacter 1 74.78000 0.9600000 3.1301400
g__MGBC102946 1 53.58000 0.2500000 0.6341430
g__MGBC108787 1 88.20000 1.1230000 1.2333140
g__MGBC113645 1 63.63000 1.0780000 1.8645750
g__MGBC140090 1 95.54000 1.0200000 2.1087060
g__MGBC143601 1 95.88000 0.4400000 1.1444400
g__MGBC163016 1 67.37000 2.2900000 1.3527210
g__Marinobacter 1 50.85000 2.0600000 1.5248470
g__Mediterraneibacter 1 55.07000 4.0200000 1.3066050
g__Methanobacterium_A 1 96.44000 0.1100000 2.2352770
g__Methanomethylophilus 1 98.38000 0.8060000 1.6299110
g__Methanoplasma 1 95.48000 1.1600000 1.2943750
g__Methanoregula 1 58.96000 9.9300000 1.7568370
g__Methanosphaera 1 91.34000 0.2000000 1.5985010
g__Methanothrix 1 96.26000 0.3900000 2.3085830
g__Methyloglobulus 1 66.74000 2.0400000 2.2778330
g__Mycetocola 1 93.00000 4.1700000 3.4344310
g__NC2004 1 53.18000 3.3800000 1.6265820
g__NIC37A-2 1 55.66000 0.1600000 2.5307600
g__OLB17 1 89.84000 1.5100000 3.9879030
g__Oligella 1 99.74000 3.5500000 2.2122110
g__Onthoplasma 1 97.69000 0.2100000 1.1555950
g__Ornithomonoglobus 1 82.73000 0.6700000 1.7929430
g__Ornithomonoglobus_A 1 51.34000 7.2500000 1.7475840
g__Oxalobacter 1 99.94000 0.1900000 2.5254150
g__Paenibacillus 1 98.92000 0.0000000 3.8814710
g__Paludibacter 1 100.00000 3.2200000 4.5095440
g__Paratractidigestivibacter 1 68.21000 0.6600000 1.3958120
g__PeH17 1 96.56000 0.2600000 1.7954600
g__Proteiniphilum 1 99.96000 0.5500000 2.9177370
g__Providencia 1 91.23000 2.5800000 4.2267620
g__Pseudochrobactrum 1 51.22000 4.7400000 1.9998690
g__Pseudocitrobacter 1 96.28000 8.7140000 4.8484100
g__Pseudomonas 1 76.97000 0.2750000 4.2230230
g__Pusillimonas_B 1 99.99000 0.6700000 3.9399620
g__RACS-047 1 65.45000 1.4300000 1.5278680
g__RFTN01 1 81.02000 0.9200000 0.7832210
g__RGIG2774 1 99.33000 0.1700000 2.0010970
g__RGIG3091 1 92.13000 1.6850000 1.2119560
g__RGIG3701 1 93.11000 1.7300000 4.3173220
g__RGIG4074 1 81.92000 0.8060000 1.6084430
g__RGIG4097 1 94.96000 0.0200000 1.5882000
g__RGIG423 1 53.64000 0.7380000 1.1266110
g__RGIG5057 1 94.78000 0.1200000 1.3024720
g__RGIG7389 1 71.24000 3.2900000 2.0355290
g__RGIG8607 1 84.23000 1.5800000 2.8970540
g__RPPU01 1 92.58000 2.0000000 4.3118760
g__RUG11130 1 91.69000 0.2300000 2.1081430
g__RUG11198 1 63.63000 9.1520000 1.7405860
g__RUG11890 1 79.65000 1.6800000 1.0271300
g__RUG12438 1 91.36000 2.6900000 1.2015840
g__RUG12867 1 84.80000 1.2350000 1.3100890
g__RUG14305 1 55.47000 8.9500000 2.9488890
g__RUG14891 1 54.46000 8.7550000 3.7555800
g__RUG591 1 92.44000 2.0900000 1.5980810
g__Rhodoferax_C 1 98.54000 2.8300000 5.2482630
g__Romboutsia_D 1 72.82000 5.5700000 2.3550210
g__Ruania 1 92.31000 2.5000000 4.2623200
g__SIG230 1 85.17000 0.4000000 1.8732640
g__SIG308 1 95.87000 1.8300000 3.4201330
g__SIG603 1 65.91000 1.0060000 1.8239100
g__SKHV01 1 98.80000 1.0200000 5.6958050
g__Scatomorpha 1 58.59000 2.4600000 1.4164590
g__Scatosoma 1 52.43000 0.0300000 0.9484970
g__Schmidhempelia 1 99.97000 0.0400000 1.7264330
g__Schwartzia 1 52.37000 0.1700000 0.7808460
g__Scybalousia 1 95.84000 1.3400000 1.3365280
g__Serratia_A 1 100.00000 1.3400000 5.8208240
g__Slackia_A 1 63.54000 0.4800000 1.1501420
g__Stenotrophomonas 1 87.83000 0.6350000 3.6664510
g__Stutzerimonas 1 55.33000 5.6800000 2.0687660
g__Symbiothrix 1 95.27000 0.1700000 2.5798760
g__Syntrophorhabdus 1 81.00000 5.4100000 2.3275200
g__Taishania 1 81.78000 2.0400000 2.7747980
g__Terracidiphilus 1 81.62000 1.9100000 4.2092490
g__Thiodictyon 1 51.80000 1.7500000 2.7653580
g__Thiopseudomonas 1 99.45000 3.0200000 2.7944710
g__UBA10108 1 83.73000 2.2300000 0.8763760
g__UBA11452 1 55.89000 2.6900000 3.2786740
g__UBA1394 1 50.00000 9.0400000 3.0054700
g__UBA1405 1 50.63000 0.3100000 1.7731710
g__UBA1752 1 75.44000 1.6200000 1.3302750
g__UBA2192 1 58.89000 1.3700000 1.2361280
g__UBA2664 1 97.46000 0.4600000 3.4316890
g__UBA2813 1 82.64000 1.9900000 1.6820780
g__UBA2883 1 80.37000 2.0600000 1.6062150
g__UBA3305 1 50.33000 8.1400000 1.6613950
g__UBA3789 1 93.82000 5.1200000 1.6781770
g__UBA3961 1 97.88000 1.3800000 3.6846550
g__UBA4636 1 59.95000 0.0000000 1.0069370
g__UBA6345 1 93.11000 0.2680000 1.2527190
g__UBA710 1 98.31000 0.7700000 0.7858270
g__UBA738 1 67.50000 0.1100000 1.1998320
g__UBA7405 1 78.78000 2.2400000 3.7474390
g__UBA9973 1 100.00000 0.4100000 0.6731830
g__UMGS1124 1 86.95000 1.2100000 1.2401560
g__UMGS1271 1 81.36000 0.3000000 1.0862590
g__UMGS1696 1 96.30000 0.0200000 2.5538520
g__UMGS1766 1 71.53000 1.4540000 2.1316140
g__UMGS1781 1 100.00000 3.1600000 1.6879910
g__UMGS1994 1 57.91000 0.5100000 0.7209690
g__UMGS363 1 91.75000 0.0800000 2.6035780
g__UMGS687 1 69.42000 0.8060000 0.7448230
g__UMGS946 1 88.78000 0.8060000 1.8872360
g__UMGS973 1 86.09000 1.9300000 1.5602950
g__Vitreimonas 1 57.65000 1.1400000 1.7915160
g__Wohlfahrtiimonas 1 97.13000 1.7300000 2.0760790
g__Wolbachia 1 98.71000 0.0000000 1.3209310
g__Zhenhengia 1 98.22000 0.6300000 3.4406380
#top 50 genera
genus_top50 <- genome_metadata %>% 
  group_by(genus) %>%
  summarise(genomes=n()) %>%
  arrange(-genomes) %>%
  filter(genus != "g__") %>% 
  pull(genus) %>% 
  head(n=100)

5.4 New taxa discovery

#species
genome_metadata %>% 
  mutate(novelty = if_else(species == "s__", "new", "known")) %>% 
  group_by(novelty) %>% 
    summarise(
      total_genomes = n(),
      percentage = (total_genomes / nrow(genome_metadata)) * 100
    ) %>% 
  tt()
novelty total_genomes percentage
known 1667 12.05089
new 12166 87.94911
#genus
genome_metadata %>% 
  mutate(novelty = if_else(genus == "g__", "new", "known")) %>% 
  group_by(novelty) %>% 
    summarise(
      total_genomes = n(),
      percentage = (total_genomes / nrow(genome_metadata)) * 100
    ) %>% 
  tt()
novelty total_genomes percentage
known 11616 83.97311
new 2217 16.02689
#order
genome_metadata %>% 
  mutate(novelty = if_else(order == "o__", "new", "known")) %>% 
  group_by(novelty) %>% 
    summarise(
      total_genomes = n(),
      percentage = (total_genomes / nrow(genome_metadata)) * 100
    ) %>% 
  tt()
novelty total_genomes percentage
known 13824 99.93493819
new 9 0.06506181
#species
genome_metadata %>% 
  left_join(assembly_metadata,by="assembly_id") %>% 
  left_join(summary_table,by="rawdata_id") %>% 
  mutate(novelty = if_else(species == "s__", "new", "known")) %>%
  group_by(host_species) %>%
  summarise(
    total_genomes = n(),
    new_genomes = sum(novelty == "new"),
    percentage_new = (new_genomes / total_genomes) * 100
  ) %>%
  arrange(desc(percentage_new)) %>% 
  tt()
host_species total_genomes new_genomes percentage_new
Podarcis muralis 2670 2604 97.52809
Podarcis gaigeae 1280 1246 97.34375
Podarcis milensis 590 570 96.61017
Lissotriton helveticus 1590 1526 95.97484
Sciurus carolinensis 1686 1616 95.84816
Calotriton asper 745 708 95.03356
Podarcis pityusensis 1046 974 93.11663
Podarcis liolepis 232 214 92.24138
Podarcis filfolensis 693 637 91.91919
Trichosurus vulpecula 61 54 88.52459
Lepus europaeus 832 711 85.45673
Sciurus vulgaris 1033 747 72.31365
Psittacula echo 123 66 53.65854
Canis familiaris 1252 493 39.37700

5.5 Genus - host heatmap

genus_host <- genome_metadata %>% 
  left_join(assembly_metadata,by="assembly_id") %>% 
  left_join(summary_table,by="rawdata_id") %>% 
  select(phylum,genus,host_species) %>% 
  filter(genus != "g__") %>% 
  group_by(phylum,genus,host_species) %>% 
  summarise(genomes=n()) %>% 
  arrange(-genomes) %>% 
  ungroup() 

genus_host_spread <- genus_host %>% 
    group_by(phylum,genus) %>% 
    summarise(host_species=n()) %>% 
    arrange(-host_species)
gtdb_tree <- read_tree("data/bac120_r214.tree")

gtdb_metadata <- read_tsv("data/bac120_taxonomy_r214.tsv.gz", col_names = FALSE) %>% 
  rename(genome=1,classification=2) %>% 
  separate(classification, c("domain","phylum","class","order","family","genus","species"),  sep =";")

represented_genera <- gtdb_metadata %>% 
  filter(genus %in% genus_host$genus) %>% 
  filter(genome %in% gtdb_tree$tip.label) %>%
  group_by(genus) %>%
  slice_sample(n = 1) %>%
  ungroup()
  
ehi_mags_tree <- keep.tip(gtdb_tree, tip=represented_genera$genome)

ehi_mags_tree$tip.label <- represented_genera %>%
  filter(genome %in% ehi_mags_tree$tip.label) %>% # Keep only rows corresponding to tree tip labels
  arrange(match(genome, ehi_mags_tree$tip.label)) %>% # Match order of genomes to tree tip labels
  pull(genus)
genus_host2 <- genus_host %>% 
  select(-phylum) %>%
  pivot_wider(names_from = host_species, values_from = genomes) %>% 
  column_to_rownames(var="genus")

genus_host %>% 
  left_join(host_metadata,by="host_species") %>% 
  mutate(genus=factor(genus,levels=ehi_mags_tree$tip.label)) %>% 
  ggplot(aes(x=host_species,y=genus,fill=genomes)) + 
    facet_nested(. ~ host_class, space="free",scales="free") +
    geom_tile() + 
    scale_fill_gradient(low = "grey", high = "#2a94d1", na.value="white") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    right_join(genus_host, by=join_by(phylum == phylum)) %>%
    select(genus,phylum) %>% 
    unique %>% 
    mutate(genus=factor(genus,levels=ehi_mags_tree$tip.label)) %>% 
    mutate(xaxis=NA) %>% 
    ggplot(aes(x=xaxis,y=genus,fill=phylum)) +
    geom_tile() +
    scale_fill_manual(values=phylum_colors)